Function with requirements:
myfun <- function(y, m){ #Two parameters: A vector and sample size
x <- sample(y, size = m)
s <- NULL
d <- NULL
for (i in x) {
if ((i <= 150) && (i %% 4 == 0)) { #if value is <= 150 and div. 4
s <- c(s, i) #append to end of vector
}
else if ((i > 150) && (i %% 3 == 0)) {
d <- c(d, i) #append to end of vector
}
}
l <- list("Sample" = x, "s" = s, "d" = d, "Mean of s" = mean(s), "Var of s" = var(s),
"Mean of d" = mean(d), "Var of d" = var(d))
#lists are great! we can add all the names we need to the list
#so the output looks (somewhat) nice!
return(l)
}
vec <- c(1:300) #creating vector of natural numbers 1-300
myfun(vec, 40)
## $Sample
## [1] 10 102 77 16 130 112 193 114 125 183 78 103 38 104 12 65 74 283 20
## [20] 288 225 90 291 229 278 13 51 255 55 159 172 214 244 93 242 15 40 68
## [39] 58 82
##
## $s
## [1] 16 112 104 12 20 40 68
##
## $d
## [1] 183 288 225 291 255 159
##
## $`Mean of s`
## [1] 53.14286
##
## $`Var of s`
## [1] 1769.143
##
## $`Mean of d`
## [1] 233.5
##
## $`Var of d`
## [1] 2982.3
Normal with mean = 10, sd = 10: black
Chi-Sq. 5 df: green
Chi-Sq. 8 df: red
Chi-Sq. 18 df: blue
x <- seq(-5, 40, length = 500) #x range
dist.x <- dnorm(x, 10, 10) #normal mean = 10, variance = 10
degf <- c(5, 8, 18)
colors <- c("green","red", "blue", "black") # Set up the four colors
labels <- c("df=5", "df = 8", "df = 18", "normal") # Label the plots
plot(x, dist.x, ylim=c(0, 0.165), type="l", lty=2, lwd=2, xlab="x value",
ylab="Density", main= "Comparison of Normal and Chi-square Distributions")
# Plot normal density curve with Title'
# I added the y limit so we can see all the peaks of the graphs!
for (i in 1:length(degf)){
lines(x, dchisq(x, degf[i]), lty=1, lwd=2, col=colors[i]) # Plot chi-square
#distribution curves on plot
}
legend("topright", inset=0.01, title="Distributions", # Set up the title of legend
labels, lwd=2, lty=c(1, 1, 1, 2), col=colors) # Set up line types appearing in the legend
#importing dataset
library(data.table)
library(dplyr)
weather <- fread("weather.csv")
#converting Date to proper type
weather$Date <- as.Date(weather$Date, format = "%m/%d/%Y")
#arranging with respect to date (dplyr)
weather <- weather %>% arrange(Date)
library(knitr)
library(kableExtra)
heads <- rbind(head(weather[format(weather$Date, "%Y") == 2020, ], 5),
head(weather[format(weather$Date, "%Y") == 2021, ], 5),
head(weather[format(weather$Date, "%Y") == 2022, ], 5))
#import libraries
library(ggplot2)
library(ggridges)
#Temps:
#get subsets
Temp_sd <- weather[weather$`Parameter Name` == "Outdoor Temperature", ]
#rename arithmetic mean column to temp
colnames(Temp_sd)[17] = "Mean Temperature"
theme_update(plot.title = element_text(hjust = 0.5))
#this sets the default for our plots to center the titles, which is amazing
#because we don't have to add that little line of code to all of the
#plots for this project
Temps <- ggplot(Temp_sd, aes(x = `Mean Temperature`, y = as.factor(format(Date, "%Y/%m")), fill = stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Temp. [F]", option = "C") +
labs(title = 'Average Temps in SD 2020-2022',
y = "Year/Month")
The next code segments are nearly identical, so they are auto hidden. To view them, click the show button on the strip below.
#Wind Speed
WindSpd_sd <- weather[weather$`Parameter Name` == "Wind Speed - Resultant", ]
colnames(WindSpd_sd)[17] = "Mean Wind Speed"
WindSpd <- ggplot(WindSpd_sd, aes(x = `Mean Wind Speed`, y = as.factor(format(Date, "%Y/%m")), fill = stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Wind Spd. (knots)", option = "C") +
labs(title = 'Average Wind Speed (knots) in SD 2020-2022',
y = "Year/Month")
#Rel Humidity
RelHDP_sd <- weather[weather$`Parameter Name` == "Relative Humidity", ]
colnames(RelHDP_sd)[17] = "Mean % Humidity"
Humidity <- ggplot(RelHDP_sd, aes(x = `Mean % Humidity`, y = as.factor(format(Date, "%Y/%m")), fill = stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "% Humidity", option = "C") +
labs(title = 'Average % Humidity in SD 2020-2022',
y = "Year/Month")
#Ozone
Ozone_sd <- weather[weather$`Parameter Name` == "Ozone", ]
colnames(Ozone_sd)[17] = "Mean Ozone"
Ozone <- ggplot(Ozone_sd, aes(x = `Mean Ozone`, y = as.factor(format(Date, "%Y/%m")), fill = stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Ozone (ppb)", option = "C") +
labs(title = 'Average Ozone Concentration (ppb) in SD 2020-2022',
y = "Year/Month")
#SO2
SO2_sd <- weather[weather$`Parameter Name` == "Sulfur dioxide", ]
colnames(SO2_sd)[17] = "Mean SO2"
SO2 <- ggplot(SO2_sd, aes(x = `Mean SO2`, y = as.factor(format(Date, "%Y/%m")), fill = stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "SO2 (ppb)", option = "C") +
labs(title = 'Average Sulfur Dioxide Concentration (ppb) in SD 2020-2022',
y = "Year/Month")
#NO2
NO2_sd <- weather[weather$`Parameter Name` == "Nitrogen dioxide (NO2)", ]
colnames(NO2_sd)[17] = "Mean NO2"
NO2 <- ggplot(NO2_sd, aes(x = `Mean NO2`, y = as.factor(format(Date, "%Y/%m")), fill = stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "NO2 (ppb)", option = "C") +
labs(title = 'Average Nitrogen Dioxide Concentration (ppb) in SD 2020-2022',
y = "Year/Month")
#CO
CO_sd <- weather[weather$`Parameter Name` == "Carbon monoxide", ]
colnames(CO_sd)[17] = "Mean CO"
CO <- ggplot(CO_sd, aes(x = `Mean CO`, y = as.factor(format(Date, "%Y/%m")), fill = stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "CO (ppm)", option = "C") +
labs(title = 'Average Carbon Monoxide Concentration (ppm) in SD 2020-2022',
y = "Year/Month")
Our data for each respective variable (temp, wind, humidity, ozone, SO2, NO2, CO) is already neatly sorted from our last problem, so all we need to do is create ggplot boxplots for each respective dataframe (Temp_sd, Wind_sd, RelHDP_sd, Ozone_sd, SO2_sd, NO2_sd, CO_sd) for each year by creating yearly subsets:
#Creating the subsets for Temps
#Take a subset of the rows which have a year == desired_year
#format() syntax required for dates
Temp_sd2020 <- Temp_sd[format(Temp_sd$Date, "%Y") == 2020,]
Temp_sd2021 <- Temp_sd[format(Temp_sd$Date, "%Y") == 2021,]
Temp_sd2022 <- Temp_sd[format(Temp_sd$Date, "%Y") == 2022,]
The rest of the subset process is identical and is hidden. If you’d like to view it, click the Show button
#wind Speed
WindSpd_sd2020 <- WindSpd_sd[format(WindSpd_sd$Date, "%Y") == 2020,]
WindSpd_sd2021 <- WindSpd_sd[format(WindSpd_sd$Date, "%Y") == 2021,]
WindSpd_sd2022 <- WindSpd_sd[format(WindSpd_sd$Date, "%Y") == 2022,]
#humidity
RelHDP_sd2020 <- RelHDP_sd[format(RelHDP_sd$Date, "%Y") == 2020, ]
RelHDP_sd2021 <- RelHDP_sd[format(RelHDP_sd$Date, "%Y") == 2021, ]
RelHDP_sd2022 <- RelHDP_sd[format(RelHDP_sd$Date, "%Y") == 2022, ]
#ozone
Ozone_sd2020 <- Ozone_sd[format(Ozone_sd$Date, "%Y") == 2020, ]
Ozone_sd2021 <- Ozone_sd[format(Ozone_sd$Date, "%Y") == 2021, ]
Ozone_sd2022 <- Ozone_sd[format(Ozone_sd$Date, "%Y") == 2022, ]
#SO2
SO2_sd2020 <- SO2_sd[format(SO2_sd$Date, "%Y") == 2020, ]
SO2_sd2021 <- SO2_sd[format(SO2_sd$Date, "%Y") == 2021, ]
SO2_sd2022 <- SO2_sd[format(SO2_sd$Date, "%Y") == 2022, ]
#NO2
NO2_sd2020 <- NO2_sd[format(NO2_sd$Date, "%Y") == 2020, ]
NO2_sd2021 <- NO2_sd[format(NO2_sd$Date, "%Y") == 2021, ]
NO2_sd2022 <- NO2_sd[format(NO2_sd$Date, "%Y") == 2022, ]
#CO
CO_sd2020 <- CO_sd[format(CO_sd$Date, "%Y") == 2020, ]
CO_sd2021 <- CO_sd[format(CO_sd$Date, "%Y") == 2021, ]
CO_sd2022 <- CO_sd[format(CO_sd$Date, "%Y") == 2022, ]
Creating the boxplot for temps:
#Only weird thing here is the scale_discrete. I included this so
#the axis labels would be actual month names instead of the numbers
Temp_sd2020Box <- ggplot(Temp_sd2020, aes(x = format(Date, "%m"),
y = `Mean Temperature`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average Temps (F) 2020 SD", x = "Month")
#I AM SO HAPPY THE MONTH.ABB THING WORKED WITH THE DISCRETE LABELS!!!
once again the code for the rest just changes the variables, so it is auto-hidden
#rest of the temps boxes
Temp_sd2021Box <- ggplot(Temp_sd2021, aes(x = format(Date, "%m"),
y = `Mean Temperature`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Temps (Faren.) 2021", x = "Month")
Temp_sd2022Box <- ggplot(Temp_sd2022, aes(x = format(Date, "%m"),
y = `Mean Temperature`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Temps (Faren.) 2022", x = "Month")
#wind speed
WindSpd_sd2020Box <- ggplot(WindSpd_sd2020, aes(x = format(Date, "%m"),
y = `Mean Wind Speed`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Wind Speeds (knots) 2020", x = "Month")
WindSpd_sd2021Box <- ggplot(WindSpd_sd2021, aes(x = format(Date, "%m"),
y = `Mean Wind Speed`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Wind Speeds (knots) 2021", x = "Month")
WindSpd_sd2022Box <- ggplot(WindSpd_sd2022, aes(x = format(Date, "%m"),
y = `Mean Wind Speed`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Wind Speeds (knots) 2022", x = "Month")
#Humidity
RelHDP_sd2020Box <- ggplot(RelHDP_sd2020, aes(x = format(Date, "%m"),
y = `Mean % Humidity`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota % Humidity 2020", x = "Month")
RelHDP_sd2021Box <- ggplot(RelHDP_sd2021, aes(x = format(Date, "%m"),
y = `Mean % Humidity`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota % Humidity 2021", x = "Month")
RelHDP_sd2022Box <- ggplot(RelHDP_sd2022, aes(x = format(Date, "%m"),
y = `Mean % Humidity`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota % Humidity 2022", x = "Month")
#ozone
Ozone_sd2020Box <- ggplot(Ozone_sd2020, aes(x = format(Date, "%m"),
y = `Mean Ozone`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Ozone Concentration (ppb) 2020", x = "Month")
Ozone_sd2021Box <- ggplot(Ozone_sd2021, aes(x = format(Date, "%m"),
y = `Mean Ozone`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Ozone Concentration (ppb) 2021", x = "Month")
Ozone_sd2022Box <- ggplot(Ozone_sd2022, aes(x = format(Date, "%m"),
y = `Mean Ozone`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Ozone Concentration (ppb) 2022", x = "Month")
#SO2
SO2_sd2020Box <- ggplot(SO2_sd2020, aes(x = format(Date, "%m"),
y = `Mean SO2`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Sulfur Dioxide Concentration (ppb) 2020", x = "Month")
SO2_sd2021Box <- ggplot(SO2_sd2021, aes(x = format(Date, "%m"),
y = `Mean SO2`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Sulfur Dioxide Concentration (ppb) 2021", x = "Month")
SO2_sd2022Box <- ggplot(SO2_sd2022, aes(x = format(Date, "%m"),
y = `Mean SO2`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Sulfur Dioxide Concentration (ppb) 2022", x = "Month")
#NO2
NO2_sd2020Box <- ggplot(NO2_sd2020, aes(x = format(Date, "%m"),
y = `Mean NO2`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Nitrogen Dioxide Concentration (ppb) 2020", x = "Month")
NO2_sd2021Box <- ggplot(NO2_sd2021, aes(x = format(Date, "%m"),
y = `Mean NO2`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Nitrogen Dioxide Concentration (ppb) 2021", x = "Month")
NO2_sd2022Box <- ggplot(NO2_sd2022, aes(x = format(Date, "%m"),
y = `Mean NO2`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Nitrogen Dioxide Concentration (ppb) 2022", x = "Month")
#CO
CO_sd2020Box <- ggplot(CO_sd2020, aes(x = format(Date, "%m"),
y = `Mean CO`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Carbon Monoxide Concentration (ppm) 2020", x = "Month")
CO_sd2021Box <- ggplot(CO_sd2021, aes(x = format(Date, "%m"),
y = `Mean CO`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Carbon Monoxide Concentration (ppm) 2021", x = "Month")
CO_sd2022Box <- ggplot(CO_sd2022, aes(x = format(Date, "%m"),
y = `Mean CO`, fill = format(Date, "%m"))) +
geom_boxplot() +
scale_fill_discrete(name = "Month", labels = month.abb[1:12]) +
scale_x_discrete(labels = month.abb[1:12]) +
labs(title = "Average South Dakota Carbon Monoxide Concentration (ppm) 2022", x = "Month")
Note about CO 2022. The Data for that set is incomplete, which is why it only goes to August.
Interactive boxplot for meteorological data. We created the yearly boxplots in the last problem, so all we need to do is simply use the ggplotly the boxplots:
#ggplotly function converts ggplot directly to interactive plotly plot
ggplotly(Temp_sd2020Box)
ggplotly(Temp_sd2021Box)
ggplotly(Temp_sd2022Box)
The rest are redundant, click Show to see code
#wind speed
ggplotly(WindSpd_sd2020Box)
ggplotly(WindSpd_sd2021Box)
ggplotly(WindSpd_sd2022Box)
#humidity
ggplotly(RelHDP_sd2020Box)
ggplotly(RelHDP_sd2021Box)
ggplotly(RelHDP_sd2022Box)
#exercise 6: counting number of occurences
#(results asis for kable)
library(kableExtra)
#freezing (I'm gonna use %>% for practice)
supercold2020 <- Temp_sd2020 %>%
select(`Mean Temperature`) %>% #select only the avg temp column
filter(`Mean Temperature` < 32) %>% #filter the column
mutate(Year = 2020) %>% #add an identifyer column for the year
mutate(Type = "avg below freezing") %>% #identifyer column for type of count
select(Year, Type) #we can select just the year and type because we are only
#concerned with the counts!
#man I really like dyplr!! I was skeptical at first, but this stuff is great!
supercold2021 <- Temp_sd2021 %>%
select(`Mean Temperature`) %>%
filter(`Mean Temperature` < 32) %>%
mutate(Year = 2021) %>%
mutate(Type = "avg below freezing") %>%
select(Year, Type)
supercold2022 <- Temp_sd2022 %>%
select(`Mean Temperature`) %>%
filter(`Mean Temperature` < 32) %>%
mutate(Year = 2022) %>%
mutate(Type = "avg below freezing") %>%
select(Year, Type)
#high above 80:
hot_hot_HOT2020 <- Temp_sd2020 %>%
select(`1st Max Value`) %>%
filter(`1st Max Value` > 80) %>%
mutate(Year = 2020) %>%
mutate(Type = "HI. hotter than 80") %>%
select(Year, Type)
hot_hot_HOT2021 <- Temp_sd2021 %>%
select(`1st Max Value`) %>%
filter(`1st Max Value` > 80) %>%
mutate(Year = 2021) %>%
mutate(Type = "HI. hotter than 80") %>%
select(Year, Type)
hot_hot_HOT2022 <- Temp_sd2022 %>%
select(`1st Max Value`) %>%
filter(`1st Max Value` > 80) %>%
mutate(Year = 2022) %>%
mutate(Type = "HI. hotter than 80") %>%
select(Year, Type)
temp_counts_df <- rbind(supercold2020, supercold2021, supercold2022,
hot_hot_HOT2020, hot_hot_HOT2021, hot_hot_HOT2022)
counts_of_temps <- table(temp_counts_df)
counts_of_temps %>% #using kable just to make table look a little nicer
knitr::kable(caption =
"Counts of Daily Avg and Hi. Temps in SD 2020-2022") %>%
kableExtra::kable_styling(bootstrap_options = c("hover", "striped"), full_width = FALSE)
| avg below freezing | HI. hotter than 80 | |
|---|---|---|
| 2020 | 171 | 159 |
| 2021 | 126 | 130 |
| 2022 | 165 | 94 |
First rows of Summer and Winter
Summer <- subset(weather, format(Date, "%m") %in% c('06', '07', '08'))
#%in% operator lets us use a vector to return logical values for the subset!
Winter <- subset(weather, format(Date, "%m") %in% c('01', '02', '12'))
Summer <- Summer %>% mutate(Season = "Summer")
Winter <- Winter %>% mutate(Season = "Winter")
#select a few particular columns to print to neat table
SeasonalHeads <- rbind(head(Summer, 10), head(Winter, 10)) %>%
select(Season, Date, `Parameter Name`, `Arithmetic Mean`,
`Units of Measure`)
SeasonalHeads %>%
knitr::kable(caption = "First few rows of Summer and Winter") %>%
kableExtra::kable_styling(bootstrap_options = c("hover", "striped", "bordered"), full_width = TRUE) %>%
pack_rows("Summer", 1, 10, color = "darkorange") %>%
pack_rows("Winter", 11, 20, color = "dodgerblue")
| Season | Date | Parameter Name | Arithmetic Mean | Units of Measure |
|---|---|---|---|---|
| Summer | ||||
| Summer | 2020-06-01 | Relative Humidity | 48.791667 | Percent relative humidity |
| Summer | 2020-06-01 | Outdoor Temperature | 76.208333 | Degrees Fahrenheit |
| Summer | 2020-06-01 | Outdoor Temperature | 75.625000 | Degrees Fahrenheit |
| Summer | 2020-06-01 | Wind Speed - Resultant | 6.125000 | Knots |
| Summer | 2020-06-01 | Wind Direction - Resultant | 222.250000 | Degrees Compass |
| Summer | 2020-06-01 | Wind Speed - Resultant | 6.691667 | Knots |
| Summer | 2020-06-01 | Wind Direction - Resultant | 210.362500 | Degrees Compass |
| Summer | 2020-06-01 | Wind Speed - Resultant | 5.116667 | Knots |
| Summer | 2020-06-01 | Wind Direction - Resultant | 232.820833 | Degrees Compass |
| Summer | 2020-06-01 | Carbon monoxide | 0.187333 | Parts per million |
| Winter | ||||
| Winter | 2020-01-01 | Relative Humidity | 52.541667 | Percent relative humidity |
| Winter | 2020-01-01 | Relative Humidity | 87.750000 | Percent relative humidity |
| Winter | 2020-01-01 | Outdoor Temperature | 36.458333 | Degrees Fahrenheit |
| Winter | 2020-01-01 | Outdoor Temperature | 33.000000 | Degrees Fahrenheit |
| Winter | 2020-01-01 | Wind Speed - Resultant | 3.825000 | Knots |
| Winter | 2020-01-01 | Wind Direction - Resultant | 267.541667 | Degrees Compass |
| Winter | 2020-01-01 | Wind Speed - Resultant | 7.866667 | Knots |
| Winter | 2020-01-01 | Wind Direction - Resultant | 140.808333 | Degrees Compass |
| Winter | 2020-01-01 | Wind Speed - Resultant | 3.512500 | Knots |
| Winter | 2020-01-01 | Wind Direction - Resultant | 225.066667 | Degrees Compass |
Dimensions
#Delete season column so dimension is proper
Summer <- Summer %>% select(!Season)
Winter <- Winter %>% select(!Season)
#creating table of the dimensions
Dimensions <- data.frame(
Season = c("Summer", "Winter"), #creating col of vector in order
"num records" = c(dim(Summer)[1], dim(Winter)[1]), #select col one of dim()
"num variables" = c(dim(Summer)[2], dim(Winter)[2])
)
Dimensions %>%
knitr::kable(caption = "Dimenions of Summer and Winter Subsets") %>%
kableExtra::kable_styling(bootstrap_options = c("hover", "striped"), full_width = FALSE, position = "center")
| Season | num.records | num.variables |
|---|---|---|
| Summer | 7360 | 29 |
| Winter | 7654 | 29 |
#daily wind for summer
#creating subsets so problem is easier
summer2020 <- Summer %>%
filter(format(Date, "%Y") == 2020)
summer2021 <- Summer %>%
filter(format(Date, "%Y") == 2021)
summer2022 <- Summer %>%
filter(format(Date, "%Y") == 2022)
winter2020 <- Winter %>%
filter(format(Date, "%Y") == 2020)
winter2021 <- Winter %>%
filter(format(Date, "%Y") == 2021)
winter2022 <- Winter %>%
filter(format(Date, "%Y") == 2022)
#we're actually gonna bypass creating two vectors for each one
#with the use of dplyr's summarise. We can calculate the mean using
#summarise because it will automatically divide total winds / numrecords
#filter winds, select mean col vector, apply summarise(mean())
avgwinds_summer2020 <- summer2020 %>%
filter(`Parameter Name` == "Wind Speed - Resultant") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric() #convert dataframe to single number
avgwinds_winter2020 <- winter2020 %>%
filter(`Parameter Name` == "Wind Speed - Resultant") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
#this strategy also cleans the data while we calculate everything
#because we set na.rm to true for calculating the mean
Rest of the code auto-hidden
#rest of the winds
avgwinds_summer2021 <- summer2021 %>%
filter(`Parameter Name` == "Wind Speed - Resultant") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avgwinds_winter2021 <- winter2021 %>%
filter(`Parameter Name` == "Wind Speed - Resultant") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avgwinds_summer2022 <- summer2022 %>%
filter(`Parameter Name` == "Wind Speed - Resultant") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avgwinds_winter2022 <- winter2022 %>%
filter(`Parameter Name` == "Wind Speed - Resultant") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
#temps
avgtemps_summer2020 <- summer2020 %>%
filter(`Parameter Name` == "Outdoor Temperature") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avgtemps_winter2020 <- winter2020 %>%
filter(`Parameter Name` == "Outdoor Temperature") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avgtemps_summer2021 <- summer2021 %>%
filter(`Parameter Name` == "Outdoor Temperature") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avgtemps_winter2021 <- winter2021 %>%
filter(`Parameter Name` == "Outdoor Temperature") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avgtemps_summer2022 <- summer2022 %>%
filter(`Parameter Name` == "Outdoor Temperature") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avgtemps_winter2022 <- winter2022 %>%
filter(`Parameter Name` == "Outdoor Temperature") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
#humidity
avghumperc_summer2020 <- summer2020 %>%
filter(`Parameter Name` == "Relative Humidity") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avghumperc_winter2020 <- winter2020 %>%
filter(`Parameter Name` == "Relative Humidity") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avghumperc_summer2021 <- summer2021 %>%
filter(`Parameter Name` == "Relative Humidity") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avghumperc_winter2021 <- winter2021 %>%
filter(`Parameter Name` == "Relative Humidity") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avghumperc_summer2022 <- summer2022 %>%
filter(`Parameter Name` == "Relative Humidity") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
avghumperc_winter2022 <- winter2022 %>%
filter(`Parameter Name` == "Relative Humidity") %>%
select(`Arithmetic Mean`) %>%
summarise(mean(`Arithmetic Mean`, na.rm=TRUE)) %>%
as.numeric()
Creating and printing the table
#just follow the pattern of combining the vectors for each col in order of
#summer2020, summer2021, summer2022, winter2020, ...
tableframe <- data.frame(
"season-year" = c("Summer-2020", "Summer-2021", "Summer-2022",
"Winter-2020", "Winter-2021", "Winter-2022"),
"avg temps"=c(avgtemps_summer2020, avgtemps_summer2021, avgtemps_summer2022,
avgtemps_winter2020, avgtemps_winter2021, avgtemps_winter2022),
"avg windSpd"=c(avgwinds_summer2020, avgwinds_summer2021, avgwinds_summer2022,
avgwinds_winter2020, avgwinds_winter2021, avgwinds_winter2022),
"avg humidity"=c(avghumperc_summer2020, avghumperc_summer2021,
avghumperc_summer2022, avghumperc_winter2020,
avghumperc_winter2021, avghumperc_winter2022)
)
tableframe %>%
knitr::kable(caption = "Seasonal Averages for Daily Avg Temps, Winds, and Humidity %") %>%
kableExtra::kable_styling(bootstrap_options = c("hover", "striped", "bordered"), full_width = TRUE) %>%
pack_rows("Summer", 1, 3, color = "darkorange") %>%
pack_rows("Winter", 4, 6, color = "dodgerblue")
| season.year | avg.temps | avg.windSpd | avg.humidity |
|---|---|---|---|
| Summer | |||
| Summer-2020 | 72.32543 | 4.806303 | 50.04484 |
| Summer-2021 | 74.55649 | 4.849363 | 59.71301 |
| Summer-2022 | 73.71784 | 5.500201 | 44.55435 |
| Winter | |||
| Winter-2020 | 30.10824 | 4.986248 | 68.11296 |
| Winter-2021 | 25.91245 | 5.029815 | 69.85231 |
| Winter-2022 | 23.86079 | 6.884367 | 64.27558 |
Daily differences of High and Mean Temperatures for the Summers of 2020-2022
#I'm going to do my general filtering and selecting that I have done before
#and then mutate an extra column which is the vector difference of
#hi and mean
tempdifs2020 <- summer2020 %>%
filter(`Parameter Name` == "Outdoor Temperature") %>%
mutate("season-year" = "summer-2020") %>% #appending a group col
mutate("hi - avg" = `1st Max Value` - `Arithmetic Mean`) %>%
select("season-year", Date, "hi - avg") %>%
rename("date" = Date) %>%
arrange(`hi - avg`) #sort ascending order by delta
tempdifs2021 <- summer2021 %>%
filter(`Parameter Name` == "Outdoor Temperature") %>%
mutate("season-year" = "summer-2021") %>% #appending a group col
mutate("hi - avg" = `1st Max Value` - `Arithmetic Mean`) %>%
select("season-year", Date, "hi - avg") %>%
rename("date" = Date) %>%
arrange(`hi - avg`) #sort ascending order by delta
tempdifs2022 <- summer2022 %>%
filter(`Parameter Name` == "Outdoor Temperature") %>%
mutate("season-year" = "summer-2022") %>% #appending a group col
mutate("hi - avg" = `1st Max Value` - `Arithmetic Mean`) %>%
select("season-year", Date, "hi - avg") %>%
rename("date" = Date) %>%
arrange(`hi - avg`) #sort ascending order by delta
#append heads to a dataframe
tableframe2 <- rbind(head(tempdifs2020, 10),
head(tempdifs2021, 10),
head(tempdifs2022, 10))
The code for the table formatting and kable manipulation is auto-hidden as it is a bit of an eyesore, but you are more than welcome to view it with the Show button
#knit data frame to neat kable table
tableframe2 %>%
knitr::kable(caption = "Heads of Daily Differences of Hi and Avg. Temps for the Summers of 2020-2022 in SD") %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"), full_width = TRUE) %>%
pack_rows("Summer 2020", 1, 10, color = 'sienna') %>%
pack_rows("Summer 2021", 11, 20, color = 'darkorange') %>%
pack_rows("Summer 2022", 21, 30, color = 'sandybrown') %>%
row_spec(c(1,3,5,7,9), color = 'gray', background= 'bisque') %>%
row_spec(c(2,4,6,8,10), color = 'gray15', background='burlywood') %>%
row_spec(c(11,13,15,17,19), color = 'gray', background='antiquewhite') %>%
row_spec(c(12,14,16,18,20), color = 'gray15', background='peachpuff') %>%
row_spec(c(21, 23, 25, 27, 29), color = 'gray', background='blanchedalmond') %>%
row_spec(c(22,24,26,28,30), color = 'gray15', background='wheat') %>%
column_spec(3, color = 'black')
| season-year | date | hi - avg |
|---|---|---|
| Summer 2020 | ||
| summer-2020 | 2020-07-29 | 4.583333 |
| summer-2020 | 2020-07-18 | 4.916667 |
| summer-2020 | 2020-06-02 | 5.125000 |
| summer-2020 | 2020-06-22 | 5.291667 |
| summer-2020 | 2020-06-01 | 5.375000 |
| summer-2020 | 2020-06-30 | 5.500000 |
| summer-2020 | 2020-06-01 | 5.791667 |
| summer-2020 | 2020-06-02 | 5.875000 |
| summer-2020 | 2020-06-21 | 6.083333 |
| summer-2020 | 2020-08-28 | 6.458333 |
| Summer 2021 | ||
| summer-2021 | 2021-07-07 | 3.666667 |
| summer-2021 | 2021-07-10 | 3.791667 |
| summer-2021 | 2021-06-26 | 4.833333 |
| summer-2021 | 2021-06-20 | 4.958333 |
| summer-2021 | 2021-07-30 | 5.250000 |
| summer-2021 | 2021-08-07 | 5.291667 |
| summer-2021 | 2021-08-20 | 5.333333 |
| summer-2021 | 2021-08-20 | 6.333333 |
| summer-2021 | 2021-08-12 | 6.541667 |
| summer-2021 | 2021-06-24 | 6.583333 |
| Summer 2022 | ||
| summer-2022 | 2022-08-25 | 3.416667 |
| summer-2022 | 2022-07-26 | 4.708333 |
| summer-2022 | 2022-07-23 | 4.833333 |
| summer-2022 | 2022-06-30 | 6.208333 |
| summer-2022 | 2022-08-31 | 6.250000 |
| summer-2022 | 2022-06-05 | 6.458333 |
| summer-2022 | 2022-06-01 | 6.791667 |
| summer-2022 | 2022-08-06 | 7.333333 |
| summer-2022 | 2022-08-15 | 7.416667 |
| summer-2022 | 2022-06-11 | 7.500000 |
Here is the link to my Shiny App: Shiny App
I also have the code here, but it’s auto-hidden. If you’d like to see the code I used to build the shiny app, click Show
library(shiny)
library(shinydashboard)
library(plotly)
library(dplyr)
library(plyr)
library(data.table)
#reading in weather like before
weather <- fread("weather.csv")
weather$Date <- as.Date(weather$Date, format = "%m/%d/%Y")
weather <- weather %>% arrange(Date)
weather2020 <- weather %>%
filter(format(Date, "%Y") == 2020)
weather2021 <- weather %>%
filter(format(Date, "%Y") == 2021)
weather2022 <- weather %>%
filter(format(Date, "%Y") == 2022)
variable_mapping <- list(
"Temperature" = 62101,
"Wind Speed" = 61103,
"Humidity" = 62201,
"Ozone" = 44201,
"Sulfur Dioxide" = 42401,
"Nitrogen Dioxide" = 42602,
"Carbon Monoxide" = 42101
)
reverse_mapping <- list(
"62101" = "Temperature",
"61103" = "Wind Speed",
"62201" = "Humidity",
"44201" = "Ozone",
"42401" = "Sulfur Dioxide",
"42602" = "Nitrogen Dioxide",
"42101" = "Carbon Monoxide"
)
data_list <- list(
"2020" = weather2020,
"2021" = weather2021,
"2022" = weather2022
)
ui <- navbarPage(
title = "Interactive Yearly Weather Histograms",
sidebarLayout(
sidebarPanel(
width = 3,
h1("Select Year and Weather Variable"),
selectInput(
inputId = "year_choice",
label = "Select Year",
choices = c("2020", "2021", "2022")
),
selectInput(
inputId = "var_choice",
label = "Select Variable",
choices = c("Temperature", "Wind Speed", "Humidity",
"Ozone", "Sulfur Dioxide", "Nitrogen Dioxide",
"Carbon Monoxide")
),
sliderInput(
inputId = "bins",
label = "Number of Bins:",
min = 5,
max = 50,
value = 30
)
),
mainPanel(
h1("Histogram"),
plotOutput("histogram", height = 700)
)
)
)
server <- function(input, output) {
output$histogram <- renderPlot({
selected_year <- input$year_choice
selected_variable <- variable_mapping[[input$var_choice]]
data <- data_list[[selected_year]] %>%
filter(`Parameter Code` == selected_variable) %>%
select(`Arithmetic Mean`)
# I was getting weird bugs, so I just created a whole new column
# so that histogram would be passed in a proper numeric vector
#each time
data$Arithmetic_Mean <- as.numeric(data$`Arithmetic Mean`)
xlim <- c(min(data$Arithmetic_Mean), max(data$Arithmetic_Mean))
bins <- seq(xlim[1], xlim[2], length.out = input$bins + 1)
hist(data$Arithmetic_Mean, breaks = bins, col = "lightsteelblue", border = "midnightblue",
xlab = reverse_mapping[[as.character(selected_variable)]],
main = paste0("Histogram of ", reverse_mapping[[as.character(selected_variable)]], " for ", selected_year))
})
}
shinyApp(ui, server)
This marks the end of my final exam. I thoroughly enjoyed the challenge you put together for us to close off the semester. I feel WAY more comfortable with R now after writing this exam. Thank you for the semester!